This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(lubridate) # Working with Dates
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(reshape)
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:lubridate':
## 
##     stamp
## The following object is masked from 'package:dplyr':
## 
##     rename
library(reshape2)# Reshaping the data
## 
## Attaching package: 'reshape2'
## The following objects are masked from 'package:reshape':
## 
##     colsplit, melt, recast
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
## 
##     smiths
## The following objects are masked from 'package:reshape':
## 
##     expand, smiths
library(tidyverse)# brings in ggplot2 and dplyr together
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ stringr 1.4.0
## ✓ tibble  3.0.4     ✓ forcats 0.5.0
## ✓ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date()        masks base::date()
## x tidyr::expand()          masks reshape::expand()
## x dplyr::filter()          masks stats::filter()
## x lubridate::intersect()   masks base::intersect()
## x dplyr::lag()             masks stats::lag()
## x reshape::rename()        masks dplyr::rename()
## x lubridate::setdiff()     masks base::setdiff()
## x reshape::stamp()         masks lubridate::stamp()
## x lubridate::union()       masks base::union()
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:purrr':
## 
##     transpose
## The following objects are masked from 'package:reshape2':
## 
##     dcast, melt
## The following object is masked from 'package:reshape':
## 
##     melt
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(zoo) ## datetime
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(bizdays) # business day
## 
## Attaching package: 'bizdays'
## The following object is masked from 'package:stats':
## 
##     offset
library(ggeasy) # for easy ggplot editing
library(harrypotter) # for palettes
library(bizdays)
library(timeDate)
## Visualizations
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:reshape':
## 
##     rename
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
library(Rcpp)
library(tsibble)
## 
## Attaching package: 'tsibble'
## The following object is masked from 'package:zoo':
## 
##     index
## The following object is masked from 'package:data.table':
## 
##     key
## The following object is masked from 'package:lubridate':
## 
##     interval
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

1.Loading Data

dat_train = data.frame(read.csv("forecast_traindata.csv"))
dat_test = data.frame(read.csv("forecast_testdata.csv"))
dat_train$application_date<-as.Date(dat_train$application_date,format = "%d-%m-%Y")
dat_test$application_date<-as.Date(dat_test$application_date,format = "%d-%m-%Y")

2.Data Integrity / Quality

Checking Structure and Summary of the data
str(dat_train)
## 'data.frame':    79922 obs. of  6 variables:
##  $ application_date: Date, format: "2017-04-01" "2017-04-01" ...
##  $ segment         : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ branch_id       : int  1 3 5 7 8 9 10 11 13 14 ...
##  $ state           : chr  "WEST BENGAL" "DELHI" "KARNATAKA" "WEST BENGAL" ...
##  $ zone            : chr  "EAST" "NORTH" "SOUTH" "EAST" ...
##  $ no_of_applicants: int  40 58 10 2 13 11 0 9 1 0 ...
summary(dat_train)
##  application_date        segment        branch_id        state          
##  Min.   :2017-04-01   Min.   :1.000   Min.   :  1.0   Length:79922      
##  1st Qu.:2017-10-26   1st Qu.:1.000   1st Qu.: 36.0   Class :character  
##  Median :2018-05-13   Median :1.000   Median : 82.0   Mode  :character  
##  Mean   :2018-05-10   Mean   :1.168   Mean   :118.8                     
##  3rd Qu.:2018-11-25   3rd Qu.:1.000   3rd Qu.:248.0                     
##  Max.   :2019-06-23   Max.   :2.000   Max.   :271.0                     
##  NA's   :2490         NA's   :2490    NA's   :15514                     
##      zone           no_of_applicants 
##  Length:79922       Min.   :    0.0  
##  Class :character   1st Qu.:    0.0  
##  Mode  :character   Median :   17.0  
##                     Mean   :  184.9  
##                     3rd Qu.:   60.0  
##                     Max.   :13787.0  
##                     NA's   :2490
There are six columns and 79922 records :1. application_date 2. segment 3.branch_id 4.state 5.zone 6.no_of_applicants.
Applicants started apply for application from 1st April 2017 and lastest is 23 th June 2019.
Dimension of the dataset
dim(dat_train)
## [1] 79922     6
glimpse(dat_train)
## Rows: 79,922
## Columns: 6
## $ application_date <date> 2017-04-01, 2017-04-01, 2017-04-01, 2017-04-01, 2017…
## $ segment          <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ branch_id        <int> 1, 3, 5, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19,…
## $ state            <chr> "WEST BENGAL", "DELHI", "KARNATAKA", "WEST BENGAL", "…
## $ zone             <chr> "EAST", "NORTH", "SOUTH", "EAST", "EAST", "EAST", "EA…
## $ no_of_applicants <int> 40, 58, 10, 2, 13, 11, 0, 9, 1, 0, 5, 1, 0, 0, 17, 8,…
Converting applicate date into relevant date format.
dat_train$application_date<-as.Date(dat_train$application_date,format = "%d-%m-%Y")
str(dat_train)
## 'data.frame':    79922 obs. of  6 variables:
##  $ application_date: Date, format: "2017-04-01" "2017-04-01" ...
##  $ segment         : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ branch_id       : int  1 3 5 7 8 9 10 11 13 14 ...
##  $ state           : chr  "WEST BENGAL" "DELHI" "KARNATAKA" "WEST BENGAL" ...
##  $ zone            : chr  "EAST" "NORTH" "SOUTH" "EAST" ...
##  $ no_of_applicants: int  40 58 10 2 13 11 0 9 1 0 ...

3.Feature Engineering :creating new features from existing data

dat_train$Day<-format(dat_train$application_date,"%d")
dat_train$WeekOfDay = format(dat_train$application_date, format = "%a") ## Getting days of week 
dat_train$Weekly = week(dat_train$application_date) #Getting the week of date
dat_train$Month<-format(dat_train$application_date,"%b")
dat_train$Year<-format(dat_train$application_date,"%Y")
dat_train$MonYr = format(dat_train$application_date, "%b-%Y") #Extracting yearly
dat_train$YearQrt = as.yearqtr(dat_train$application_date)#Extracting yearquarterly
dat_train$qrtr<-quarters(dat_train$application_date) # extracting quarter
dat_train$YearQrt = as.character(as.yearqtr(dat_train$application_date))#Extracting quarterly
dat_train$Qrt = as.character(quarters(dat_train$application_date))#Extracting quarterly 
Train dataset
dat_train

split the dataframe

split_data <- split(dat_train, f = dat_train$segment) 

generate two different dataframe

df_segment1<-dat_train %>%
               filter(segment==1)
dim(df_segment1)
## [1] 64408    15
DT <- rbindlist(split_data[2])
df_segment2<-as.data.frame(DT)
dim(df_segment2)
## [1] 13024    15

creating new dataframe df_segment2

df_segment2<-dplyr::select(df_segment2,-c("branch_id","zone"))
dim(df_segment2)
## [1] 13024    13
df_segment2

4. Data Visualization

#segmnet1
ggplot(df_segment1,aes (x=reorder(state,no_of_applicants),
              y=no_of_applicants)) + geom_bar(stat="summary",fun="sum", width=0.5, fill = "Darkred")+
  labs(x="state",y = "no_of_applicants", fill="state") +
  ggtitle("state wise contribution in Segment1 ")+
  theme_bw()+
  theme(axis.text.x=element_text(angle=60,hjust=1))

The above bar chart shows that Number of applicants in different state of India from 2017 to 2019.In segment 1,Maharashtra accounts for a high number of applicants, followed by Gujarat and West Bengal.There was very small number of applicants in Chhattisgarh followed by Kerala and Haryana.
#segment2
ggplot(df_segment2,aes (x=reorder(state ,no_of_applicants),
              y=no_of_applicants)) + geom_bar(stat="summary",fun="sum", width=0.5, fill = "Darkred")+
  labs(x="state",y = "no_of_applicants", fill="state") +
  ggtitle("state wise contribution in Segment2 ")+
  theme_bw()+
  theme(axis.text.x=element_text(angle=60,hjust=1))

In segment 2,Tamil Nadu topped for a high number of applicants, followed by Karnataka ,Bihar, Orrisa and West Bengal.There was zero applicants from Haryana and Punjab. Average Percentage of applicants in segment2 w.r.t year
p17<-df_segment2 %>%
               filter(Year==2017)
aggData <- aggregate(x =p17$`no_of_applicants`,
                     by=list(state_wise = p17$state),
                     FUN = mean)
ggplot(data = aggData, aes(x = state_wise, y = prop.table(stat(aggData$x)), fill = state_wise, label = scales::percent(prop.table(stat(aggData$x))))) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_text(stat = 'identity', position = position_dodge(.9),  vjust = -0.5, size = 2) + 
  scale_y_continuous(labels = scales::percent) + 
  theme_bw()+
  theme(axis.text.x=element_text(angle=60,hjust=1))+
  labs(x = 'state', y = 'applicants in percntage', fill = 'state') + 
  ggtitle("Percentage of Applicants in segment 2 (2017)")

In 2017,Tamil Nadu, Orissa and West Bengal contributed to over 60% of applications for segment 2
#2018
p18<-df_segment2 %>%
               filter(Year==2018)
aggData <- aggregate(x =p18$`no_of_applicants`,
                     by=list(state_wise = p18$state),
                     FUN = mean)
ggplot(data = aggData, aes(x = state_wise, y = prop.table(stat(aggData$x)), fill = state_wise, label = scales::percent(prop.table(stat(aggData$x))))) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_text(stat = 'identity', position = position_dodge(.9),  vjust = -0.5, size = 2) + 
  scale_y_continuous(labels = scales::percent) + 
  theme_bw()+
  theme(axis.text.x=element_text(angle=60,hjust=1))+
  labs(x = 'state', y = 'applicants in percntage', fill = 'state') + 
  ggtitle("Percentage of Applicants segment 2 (2018)")

As we can see from the above graph the number of applicants from Orrissa and West Bengal dropped suddenly in 2018.By contrast,Bihar and Assam shot up quickly in the same year,while Tamil Nadu remained steady
#2019
p19<-df_segment2 %>%
               filter(Year==2019)
aggData <- aggregate(x =p19$`no_of_applicants`,
                     by=list(state_wise = p19$state),
                     FUN = mean)
ggplot(data = aggData, aes(x = state_wise, y = prop.table(stat(aggData$x)), fill = state_wise, label = scales::percent(prop.table(stat(aggData$x))))) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_text(stat = 'identity', position = position_dodge(.9),  vjust = -0.5, size = 2) + 
  scale_y_continuous(labels = scales::percent) + 
  theme_bw()+
  theme(axis.text.x=element_text(angle=60,hjust=1))+
  labs(x = 'state', y = 'applicants in percntage', fill = 'state') + 
  ggtitle("Percentage of Applicants segment 2 (2018)")

According to the graph the number of applicants from Orrissa and West Bengal went down continually from 2017 to 2019.By contrast, In last two years,number of applicants from Bihar rose significanlty , while number of applicants in Tamil Nadu increased slightly in the year 2019. Average percentage of applicants in each state in segment 1 w.r.t years.
segment17<-df_segment1 %>%
               filter(Year==2017)
aggData <- aggregate(x =segment17$`no_of_applicants`,
                     by=list(state_wise = segment17$state),
                     FUN = mean)
ggplot(data = aggData, aes(x = state_wise, y = prop.table(stat(aggData$x)), fill = state_wise, label = scales::percent(prop.table(stat(aggData$x))))) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_text(stat = 'identity', position = position_dodge(.9),  vjust = -0.5, size = 2) + 
  scale_y_continuous(labels = scales::percent) + 
  theme_bw()+
  theme(axis.text.x=element_text(angle=60,hjust=1))+
  labs(x = 'state', y = 'applicants in percntage', fill = 'state') + 
  ggtitle("Percentage of Applicants segment 1 (2017)")

The given bar graph illustrates the number of applicants in each state of India in th year 2017 under segment 1 category.We can see from the graph that Delhi has maximum applicants.while West Bengal, Gujurat has 1/2 applicant of Delhi.There was zero applicants from Chattisgarh,Haryana and Uttar Pradesh.
#2018
segment18<-df_segment1 %>%
               filter(Year==2018)
aggData <- aggregate(x =segment18$`no_of_applicants`,
                     by=list(state_wise = segment18$state),
                     FUN = mean)
ggplot(data = aggData, aes(x = state_wise, y = prop.table(stat(aggData$x)), fill = state_wise, label = scales::percent(prop.table(stat(aggData$x))))) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_text(stat = 'identity', position = position_dodge(.9),  vjust = -0.5, size = 2) + 
  scale_y_continuous(labels = scales::percent) + 
  theme_bw()+
  theme(axis.text.x=element_text(angle=60,hjust=1))+
  labs(x = 'state', y = 'applicants in percntage', fill = 'state') + 
  ggtitle("Percentage of Applicants segment 1 (2018)")

according the above graph, In year 2018, As we can see in 2018, there were a few applicants in Chhattisgarh, Haryana, and Astra Pradesh, as opposed to 2017, when there were no applicants from those mentioned states.Delhi remained
#2019
segment19<-df_segment1 %>%
               filter(Year==2019)
aggData <- aggregate(x =segment19$`no_of_applicants`,
                     by=list(state_wise = segment19$state),
                     FUN = mean)
ggplot(data = aggData, aes(x = state_wise, y = prop.table(stat(aggData$x)), fill = state_wise, label = scales::percent(prop.table(stat(aggData$x))))) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_text(stat = 'identity', position = position_dodge(.9),  vjust = -0.5, size = 2) + 
  scale_y_continuous(labels = scales::percent) + 
  theme_bw()+
  theme(axis.text.x=element_text(angle=60,hjust=1))+
  labs(x = 'state', y = 'applicants in percntage', fill = 'state') + 
  ggtitle("Percentage of Applicants segment 1 (2019)")

As we can see from the graph, Number of applicants from different states in past years was’t change much except Gujurat ,Telangana and West Bengal applicants declined slowly in 2019.
df1 <- as_tsibble( df_segment1 ,
index=application_date,
key=c(segment, branch_id, state, zone))
monthly_view <- df1 %>%
group_by_key() %>%
index_by(Year_Month = ~ yearmonth(.)) %>% # monthly aggregates
summarise(
Total_Applications = sum(no_of_applicants, na.rm = TRUE)
)
monthly_view1 <- monthly_view %>%
group_by(zone) %>%
index_by(Year_Month) %>% # monthly aggregates
summarise(
Total_Applications = sum(Total_Applications, na.rm = TRUE)
)
date_range = as.Date(c('01-03-2017','06-07-2019'), "%d-%m-%Y")
fig <- plot_ly(monthly_view1, x=~Year_Month,
y=~Total_Applications, color=~zone,
type='scatter',
mode='lines') %>%
layout(title = 'Monthly Applications v/s Zone',
xaxis = list(title = 'Months', range=date_range,
ticktext=(as.character(monthly_view1[1:27,]$Year_Month)),
tickvals=(monthly_view1[1:27,]$Year_Month),
tickmode='array'),
yaxis = list(title = 'Applicants'))
fig
Here we see the applications per month by the zone.All the zones appear to move together with the west zone showing most exaggerated movement.
mdl_dt<-dat_train[,c("segment","application_date","no_of_applicants")]
cs_trend<-mdl_dt%>%group_by(segment,application_date)%>%summarise(No_cases = sum(no_of_applicants),.groups='drop')

ggplot(cs_trend,aes(x = application_date,y = No_cases,color = segment))+geom_line(stat = "identity")+labs(title = "Trend of Applications by Segment")+scale_x_date(date_labels = "%b-%Y")+facet_grid(segment~.,scale = "free")
## Warning: Removed 1 row(s) containing missing values (geom_path).

Here we plot the trend of segment1 and 2 w.r.t No of applicants across the years. The segments differ from each other; Segment 2 seems to indicate a cyclic trend, whereas segment 1 exhibits substantial peaks initially.
library(ggplot2)
#segment1
level_orderM<-c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec") 
month_summary <-df_segment1 %>% 
                    group_by(Month) %>%
                    summarise(no_of_applicants= sum(no_of_applicants),.groups='drop') 
month_summary %>%
         ggplot(aes(x =factor(Month,level=level_orderM), y = no_of_applicants, fill = Month)) +
         geom_col() +
         scale_fill_hp_d(option = "LunaLovegood") +
         scale_y_continuous(limits = c(0, 300000), expand = c(0,0)) +
         labs(title = "Total number of applicants in  each month in segment1", x = "Monthly", y = "Total applicants") 

Over a span of three years, Oct month shows a peak in applicants for segment 1
#segment2

level_orderM<-c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec") 
month_summary <-df_segment2 %>% 
                    group_by(Month) %>%
                    summarise(no_of_applicants= sum(no_of_applicants),.groups='drop') 
month_summary %>%
         ggplot(aes(x =factor(Month,level=level_orderM), y = no_of_applicants, fill = Month)) +
         geom_col() +
         scale_fill_hp_d(option = "Slytherin") +
         scale_y_continuous(limits = c(0, 1231731), expand = c(0,0)) +
         labs(title = "Total number of applicants in  each month in segment2", x = "Monthly", y = "Total applicants") 

Whereas, segment 2 is mostly stationary barring slight increase in Mar, May and Jun.
level_orderD<-c("Mon","Tue","Wed","Thu","Fri","Sat","Sun") 
wday_summary <- df_segment1 %>% 
                group_by(YearQrt) %>%
                summarise(no_of_applicants = sum(no_of_applicants),.groups='drop') 
wday_summary %>%
  ggplot(aes(x =YearQrt ,y = no_of_applicants, fill = YearQrt)) +
  geom_col() +
  scale_fill_hp_d(option = "Ravenclaw") +
  scale_y_continuous(limits = c(0, 400000), expand = c(0,0)) +
  labs(title = "Total segment 1 applicants by quarter ",x = "Quarterly")

Here we notice a clear uptick in applicants in the neighbourhood of 2018 4th quarter. The number of applicants increase gradually from 2019 Q2 till 2018 Q4 then fell suddenly.
# segment2
level_orderD<-c("Mon","Tue","Wed","Thu","Fri","Sat","Sun") 
wday_summary <- df_segment2 %>% 
                group_by(YearQrt) %>%
                summarise(no_of_applicants = sum(no_of_applicants),.groups='drop') 
wday_summary %>%
  ggplot(aes(x =YearQrt ,y = no_of_applicants, fill = YearQrt)) +
  geom_col() +
  scale_fill_hp_d(option = "DracoMalfoy") +
  scale_y_continuous(limits = c(0, 2000000), expand = c(0,0)) +
  labs(title = "Total applicants in segment 2 by Quarter ",x = "Quarterly")

Segment 2 shows a similar trend, with an uptick 2018 Q3 and 2019 Q1.
level_orderD<-c("Mon","Tue","Wed","Thu","Fri","Sat","Sun") 
wday_summary <- df_segment1 %>% 
                group_by(WeekOfDay) %>%
                summarise(no_of_applicants = mean(no_of_applicants),.groups='drop') 
wday_summary %>%
  ggplot(aes(x =factor(WeekOfDay,level=level_orderD) ,y = no_of_applicants, fill = WeekOfDay)) +
  geom_col() +
  scale_fill_hp_d(option = "HermioneGranger") +
  scale_y_continuous(limits = c(0, 40), expand = c(0,0)) +
  labs(title = "Avg applicants by the day of the week",x = "Days of Week")

On average, Monday and Wednesday get more number of applicants; therefore requiring a greater allocation of manpower on those days.
level_orderD<-c("Mon","Tue","Wed","Thu","Fri","Sat","Sun") 
level_orderM<-c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec") 
wday_month_summary <- df_segment1 %>% 
  group_by(WeekOfDay, Month) %>%
  summarise(mean_applicants = mean(no_of_applicants),.groups='drop')
wday_month_summary %>%
  ggplot(aes(x = factor(WeekOfDay,level=level_orderD), y = mean_applicants, fill = WeekOfDay)) +
  geom_col(width =1)+
 scale_fill_hp_d(option = "DracoMalfoy") +
  facet_wrap(~Month,dir="v",ncol=3,nrow= 4,as.table = TRUE) +
  scale_y_continuous(limits = c(0, 80), expand = c(0,0)) +
  labs(title = "Avg applicants though the week in every  month", 
       x = "Week Of Day", y = "total applicants" ) 

As is consistent with the previous observations, Oct appears to be busier than other months.
#segment1
level_orderY<-c("2017","2018","2019","2020") 
level_orderM<-c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec") 
wday_month_summary <- df_segment1 %>% 
  group_by(Year, Month) %>%
  summarise(mean_applicants = mean(no_of_applicants),.groups='drop')
wday_month_summary %>%
  ggplot(aes(x = factor(Month,level=level_orderM), y = mean_applicants, fill = Month)) +
  geom_col(width =1)+
 scale_fill_hp_d(option = "Ravenclaw") +
  facet_wrap(~Year,dir="h",ncol=1 ,as.table = TRUE) +
  scale_y_continuous(limits = c(0, 70), expand = c(0,0)) +
  labs(title = "Avg applicants each month  across the Year in Segment1", 
     x = "Month Of Year ", y = "total applicants" ) 

The above graph depict that number of applicants in each month w.r.t year. In 2018,every months has applicant’s contirbution as copare to 2017 and 2019. In 2017 there wasnt any applcant from the first quarter. In 2019 there wasnt application contribution from last two quarters.Over the years, more applicants in month of October , November and March.
#segment2
wday_month_summary <- df_segment2 %>% 
  group_by(Year, Month) %>%
  summarise(mean_applicants = mean(no_of_applicants),.groups='drop')
wday_month_summary %>%
  ggplot(aes(x = factor(Month,level=level_orderM), y = mean_applicants, fill = Month)) +
  geom_col(width =1)+
 scale_fill_hp_d(option = "Hufflepuff") +
  facet_wrap(~Year,dir="h",ncol=1 ,as.table = TRUE) +
  scale_y_continuous(limits = c(0, 1500), expand = c(0,0)) +
  labs(title = "Avg applicants each month  across the Year in Segment2", 
       x = "Month Of Year ", y = "total applicants" ) 

In segment2, Number of applicant increased significantly.In 2018, number of contributer in each month about same.